require(data.table)
require(ggplot2)
require(ComplexUpset)
require(clusterProfiler)
require(psych)
require(lme4)
require(lmerTest)
require(pheatmap)
1 Introduction
The general idea is to find changes in the metabolic network of IBD patients which are associated with a change the disease activity scores. I analysed data sets for the reaction activity scores, the presence/absence of reactions after context specific model reconstruction and FVA analysis using a reaction based GLMM approach. This results in sets of reactions, which are associated with the disease activity. Furthermore, I have made set enrichments for the subsystems - again something which can be compared for the different analysis.
2 Results
2.1 Reaction association
First lets look at the reactions which are shared between the different analysis.
coefs.rxnExpr <- fread("./Statistics/results/rxnExpr.GLMM.Response.intervention_coefs.csv")
sigs.rxnExpr <- coefs.rxnExpr[padj < 0.05,]
coefs.PA <- fread("./Statistics/results/PA.GLMM.Response.intervention_coefs.csv")
sigs.PA <- coefs.PA[padj < 0.05,]
coefs.FVA <- fread("./Statistics/results/FVA.GLMM.Response.intervention_coefs.csv")
sigs.FVA <- coefs.FVA[padj < 0.05,]
sigs <- list(rxnExpr = sigs.rxnExpr,
PA = sigs.PA,
FVA = sigs.FVA)
# annotations
subsystems <- fread("./Statistics/dat/subsystems.csv")
rxnAnno <- fread("../../resources/recon-store-reactions-1.tsv")
Here is an upset plot for the reactions.
rxns <- unique(c(subsystems[,reaction], unlist(sapply(sigs,function(x) x[,rxn]))))
vennMat <- matrix(0, nrow = length(rxns), ncol = length(sigs), dimnames = list(rxns, names(sigs)))
for(set.nm in names(sigs)){
set <- sigs[[set.nm]]
vennMat[set[,rxn],set.nm] <- 1
}
upset(as.data.frame(vennMat), intersect = colnames(vennMat))
There are 3 reactions which are associated in all analysis with Response changes. Here is the complete list:
From all the reactions we can make another hypergeometric enrichment - which would be an overall assessment of the analysis performed.
hgt <- enricher(unique(unlist(sapply(sigs, function(x) x[,rxn]))), TERM2GENE = subsystems)
dotplot(hgt, x = "Count", showCategory = nrow(data.frame(hgt)))
Additionally, we can summarize the results from all data sets and do a GSEA as well.
# I need the cluster information to expand the estimates accordingly
cluster.rxnExpr <- fread("./Statistics/results/rxnExpr_DBSCAN_Cluster.csv")
cluster.PA <- fread("./Statistics/results/PA_DBSCAN_Cluster.csv")
cluster.FVA <- fread("./Statistics/results/FVA_DBSCAN_Cluster.csv")
# merge the cluster with the coef table
coefs.rxnExpr <- merge(coefs.rxnExpr,
cluster.rxnExpr[,.(rxn,rep.rxn)],
by.x = "rxn",
by.y = "rep.rxn",
suffix = c("",".clustered"),
keep.x = TRUE)
coefs.PA <- merge(coefs.PA,
cluster.PA[,.(rxn,rep.rxn)],
by.x = "rxn",
by.y = "rep.rxn",
suffix = c("",".clustered"),
keep.x = TRUE)
coefs.FVA <- merge(coefs.FVA,
cluster.FVA[,.(rxn,rep.rxn)],
by.x = "rxn",
by.y = "rep.rxn",
suffix = c("",".clustered"),
keep.x = TRUE)
# get the common columns
clmns <- intersect(colnames(coefs.rxnExpr), colnames(coefs.PA))
clmns <- intersect(colnames(coefs.FVA), clmns)
# merge on
coefs.all <- rbind(
cbind(coefs.rxnExpr[,..clmns], data.set = "rxnExpr"),
cbind(coefs.PA[,..clmns], data.set = "PA"),
cbind(coefs.FVA[,..clmns], data.set = "FVA"))
gseaTab <- coefs.all[,.(Estimate.sum = sum(Estimate)), by = rxn.clustered]
gseaVector <- gseaTab[,Estimate.sum]
names(gseaVector) <- gseaTab[,rxn.clustered]
gseaVector <- sort(gseaVector, decreasing = TRUE)
gsea.all <- GSEA(gseaVector,
TERM2GENE = subsystems,
minGSSize = 3,
maxGSSize = 1E6,
verbose = FALSE,
pvalueCutoff = 0.1)
if(nrow(data.frame(gsea.all)) > 0){
ridgeplot(gsea.all)
}
There question remains, how to display that in a concise manner - the answer: in a simple boxplot for all the significant reactions in the subsystems and their estimates.
sig.subs<- unique(c(data.frame(hgt)[,"ID"], data.frame(gsea.all)[,"ID"]))
coefs.all.subs <- merge(coefs.all, subsystems, by.x = "rxn.clustered", by.y = "reaction", keep.x = TRUE)
sub.order <- coefs.all.subs[,.(od = median(Estimate)), by = subsystem]
coefs.all.subs[,subsystem := factor(subsystem, level = sub.order[order(od), subsystem])]
plt.sub.est.all <- ggplot(coefs.all.subs[subsystem %in% sig.subs,], aes(x= sin(Estimate)*log(abs(1/Estimate)+1), y = subsystem)) +
geom_vline(xintercept = 0, color = "red", linetype = 2)+
geom_boxplot(fill = NA) +
ggtitle("All rxns")+
theme_bw()
plt.sub.est.sig <- ggplot(coefs.all.subs[subsystem %in% sig.subs & padj <=0.05,], aes(x= sin(Estimate)*log(abs(1/Estimate)+1), y = subsystem)) +
geom_vline(xintercept = 0, color = "red", linetype = 2)+
geom_boxplot(fill = NA) +
ggtitle("Sig. rxns")+
theme_bw()
cowplot::plot_grid(plt.sub.est.all, plt.sub.est.sig)
2.2 Subsystem association
Similar we can make a summary of the subsystems by comparing the different subsystems which have been enriched in the different analysis and with GSEA and HGT.
gsea.rxnExpr <- readRDS("Statistics/results/rxnExpr.GSEA.Response.intervention_GSEAResult.RDS")
gsea.PA <- readRDS("Statistics/results/PA.GSEA.Response.intervention_GSEAResult.RDS")
gsea.FVA <- readRDS("Statistics/results/FVA.GSEA.Response.intervention_GSEAResult.RDS")
vennMat.subsys <- matrix(0, ncol = 2*length(sigs), nrow = length(unique(subsystems[,subsystem])),
dimnames = list(unique(subsystems[,subsystem]),
paste0(sort(rep(c("hgt.","gsea."), length(sigs))), rep(names(sigs),2)))
)
for(n in names(sigs)){
hgt <- enricher(sigs[[n]][,rxn],TERM2GENE = subsystems)
vennMat.subsys[data.frame(hgt)[,"ID"], paste0("hgt.",n)] <- 1
gsea <- get(paste0("gsea.",n))
vennMat.subsys[data.frame(gsea)[,"ID"], paste0("gsea.",n)] <- 1
}
upset(as.data.frame(vennMat.subsys), intersect = colnames(vennMat.subsys))
dt.vennMat.subsys <- data.table(vennMat.subsys, total = rowSums(vennMat.subsys), keep.rownames = TRUE)
DT::datatable(dt.vennMat.subsys[total > 1,])
There is(are) exactly 0 subsystem(s) which is(are) found across all analysis. That is:
tbl.subsys <- rxnAnno[subsystem %in% rownames(vennMat.subsys)[rowSums(vennMat.subsys) == ncol(vennMat.subsys)],]
DT::datatable(tbl.subsys)
2.3 Diversity measures
An interesting finding was, that generally there were more reactions which, if present/active, there was a reduction in disease activity. This can be either an effect that certain reactions are really blocked/reduced in activity if Response scores are high or it is a result of a reduced metabolic diversity in higher inflammation states. To test that one can correlate the Response score with the diversity measures of the different analysis.
diversity <- fread("Statistics/results/diversityMeasures.csv")
pairs.panels(diversity[,.(Remission,Response,HB_Mayo_impu,richness.PA, shannon.rxnExpr, shannon.FVA)],
method = "spearman",
stars = TRUE,
main = "Spearman's rho")
So there are slight correlation, which poses the question, whether this signal remains, if tested in a linear mixed model to correct for PatientID independence.
mod.richness.PA <- glmer(data = diversity,
factor(Response) ~ richness.PA + (1|PatientID),
family = binomial)
mod.shannon.rxnExpr <- glmer(data = diversity,
factor(Response) ~ shannon.rxnExpr + (1|PatientID),
family = binomial)
mod.shannon.FVA <- glmer(data = diversity,
factor(Response) ~ shannon.FVA + (1|PatientID),
family = binomial)
stats <- data.table(rbind(
coef(summary(mod.richness.PA)),
coef(summary(mod.shannon.rxnExpr)),
coef(summary(mod.shannon.FVA))),
keep.rownames = TRUE)[rn != "(Intercept)",]
stats[,padj := p.adjust(`Pr(>|z|)`, method = "BH")]
DT::datatable(stats)
There is a decrease of diversity in FVA ranges with the increase in Response signal - that is interesting indeed.
2.4 PCA for clustering
Another idea is to use the different significant reactions and use the data to make a ordination to get a feeling how well we can separate the different samples form each other by Response scores.
rxnExpr <- read.csv("../results/RxnExpression/rxnExpr.GL10-L50-GU90.colormore3D.csv", row.names=1)
rxnExpr <- rxnExpr[sigs.rxnExpr[,rxn],]
PA <- read.csv("../results/ModelAnalysis/rxnIdMat.GL10-L50-GU90.MatjesAbsorption.colormore3D.csv", row.names=1)
PA <- PA[sigs.PA[,rxn],]
# make PA to integer
PA2 <- PA
PA2[,] <- 0
PA2[PA == "True"] <- 1
PA <- PA2
FVA.range <- read.csv("../results/FVA/rangeFVA.GL10-L50-GU90.MatjesAbsorption.colormore3D.csv", row.names=1)
FVA.range <- FVA.range[sigs.FVA[coef == "range",rxn],]
FVA.center <- read.csv("../results/FVA/centerFVA.GL10-L50-GU90.MatjesAbsorption.colormore3D.csv", row.names=1)
FVA.center <- FVA.center[sigs.FVA[coef == "center",rxn],]
# transform the PA data to jaccard distances, otherwise it is less meaningful
PA.dist <- as.matrix(dist(t(PA), method = "binary"))
mat.all <- t(rbind(rxnExpr, PA.dist, FVA.center, FVA.range))
pca <- prcomp(mat.all, scale = TRUE, center = TRUE)
pca.x <- merge(data.table(pca[["x"]], keep.rownames = TRUE), diversity, by.x = "rn", by.y = "SeqID")
pca.var <- pca[["sdev"]]^2/sum(pca[["sdev"]]^2)*100
pca.plot <- ggplot(pca.x, aes(x=PC1,y=PC2)) +
#scale_color_gradient(low = "blue", high ="red") +
geom_point(aes(color = Response, shape = Diagnosis),size = 3) +
theme_bw() +
facet_grid(~Diagnosis) +
labs(y = paste0("PC2 (", round(pca.var[2],2),"%)"),
x = paste0("PC1 (", round(pca.var[1],2),"%)"),
color = "Response score")
pca.plot
pca.plot <- ggplot(pca.x, aes(x=PC1,y=PC2)) +
scale_color_gradient(low = "blue", high ="red") +
geom_point(aes(color = log10(Time_seq+1), shape =Response),size = 3) +
theme_bw() +
facet_grid(~Diagnosis) +
labs(y = paste0("PC2 (", round(pca.var[2],2),"%)"),
x = paste0("PC1 (", round(pca.var[1],2),"%)"))
pca.plot
# only the rxns common in all
rxns <- rownames(vennMat)[rowSums(vennMat) == ncol(vennMat)]
PA.common.dist <- as.matrix(dist(t(PA[rxns,])))
FVA.common.range <- FVA.range[rownames(FVA.range) %in% rxns,]
FVA.common.center <- FVA.center[rownames(FVA.center) %in% rxns,]
rxnExpr.common <- rxnExpr[rxns,]
mat.common.all <- t(rbind(rxnExpr.common, PA.common.dist, FVA.common.range))
pca.common <- prcomp(mat.common.all, scale = TRUE, center = TRUE)
#
pca.common.x <- merge(data.table(pca.common[["x"]], keep.rownames = TRUE), diversity, by.x = "rn", by.y = "SeqID")
pca.common.var <- pca.common[["sdev"]]^2/sum(pca.common[["sdev"]]^2)*100
#
pca.common.plot <- ggplot(pca.common.x, aes(x=PC1,y=PC2)) +
# scale_color_gradient(low = "blue", high ="red") +
geom_point(aes(color = Response, shape = Diagnosis),size = 3) +
theme_bw() +
facet_grid(~Diagnosis) +
labs(y = paste0("PC2 (", round(pca.common.var[2],2),"%)"),
x = paste0("PC1 (", round(pca.common.var[1],2),"%)"),
color = "Response")
pca.common.plot